/* 
    Purpose: This file brings in the 1940 income scores created
             at various levels in the NBER server and makes
             adjustments at each level of variation 
             for self-employed and farmer income. Adjustments 
             are made according to the Collins and Wanamaker (2017)
             approach.

    Note: All ratios are constructed in the Census subfolder 
          "1940_Adjustments".

    Creates: All output files have the suffix _CWfix.
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/output"

****************************
* FATHER INCOME
****************************

*----------------------*
* OCCUPATION ONLY
*----------------------*

    use avgincomes_fathers1940_byocc.dta, clear
    gen number=1

    * Keep relevant measures
    keep avg_incwage_1940 avg_HHinc_1940 fatheroccej number
    
    gen laborer = fatheroccej==71
    gen farmer = fatheroccej==81
    gen self_emp = fatheroccej==21
    gen non_self_emp = fatheroccej==28
    gen imputed_occ_CW = laborer==1 | farmer==1 | self_emp==1
    
* 1. Adjust farmer income
    merge m:1 number using FarmerRatios_1960_all.dta
    drop _merge
    
    clonevar avg_person_CWfix = avg_incwage_1940 
    clonevar avg_hh_CWfix = avg_HHinc_1940 
    
    //Account for in-kind income of laborers 
        /*Note: See https://data.nber.org/data-appendix/w23395/Appendix_May_2020.pdf
                for more info */
    foreach var of varlist avg_person_CWfix avg_hh_CWfix {
        replace `var' = 1.26*`var' if laborer==1 
    }

    /*Assign farmers laborer income * ratio 
      of average farmer income to average
      laborer income */
    foreach var in person hh {
        gen temp_`var' = avg_`var'_CWfix if laborer==1
        egen laborer_inc_`var' = max(temp_`var') 
        
        replace avg_`var'_CWfix = laborer_inc_`var' * ratio_farm_`var'_all if farmer==1 
    }

    drop temp* laborer_inc* 
    
* 2. Adjust self-employed income
    merge m:1 number using SelfEmploymentRatios_1960_all.dta
    drop _merge
    
    foreach var in person hh {
    
        gen temp_`var' = avg_`var'_CWfix if non_self_emp==1
        egen income_`var' = max(temp_`var') 
        
        /* Give self-employed respondents 
           non-self-employed income * ratio 
           of self-employed to not self-employed
           average income (from the 1960 census) */
        replace avg_`var'_CWfix = income_`var' * ratio_self_emp_all_`var' if self_emp==1 
    }

* Save    
    drop temp* income_* ratio* laborer farmer self_emp non_self_emp number
    
    assert avg_incwage_1940==avg_person_CWfix if imputed_occ_CW==0
    assert avg_HHinc_1940==avg_hh_CWfix if imputed_occ_CW==0
    
    rename avg_HHinc_1940 avg_HHinc_1940_nofix
    rename avg_incwage_1940 avg_incwage_1940_nofix
    
    rename avg_person_CWfix avg_incwage_1940_CWfix
    label var avg_incwage_1940_CWfix "Average income score, incwage, all, CW fix"
    
    rename avg_hh_CWfix avg_HHinc_1940_CWfix
    label var avg_HHinc_1940_CWfix "Average income score, HH income, all, CW fix"
    
    compress
    save avgincomes_fathers1940_byocc_CWfix.dta, replace

**---------------------------------------------------------------------------**
**---------------------------------------------------------------------------**
    
*-----------------------*
* NOW MORE GRANULAR LEVELS 
*-----------------------*

foreach group in byrace byrace_bysouth byrace_bysouth_altwgt byrace_bysouth_byedu byrace_byregion  {

    noisily display "`group'"

    if "`group'"=="byrace" {
        local level "race"
        local var1 "avg_incwage_1940_byrace" 
        local var2 "avg_HHinc_1940_byrace" 
        local rationame1 "ratio_farm"
        local rationame2 "ratio_self_emp"
        local title "byrace"
    }
    
    if "`group'"=="byrace_bysouth" {
        local level "race south_merge"
        local var1 "avgincwage_1940_byrace_bysouth" 
        local var2 "avg_HHinc_1940_byrace_bysouth"  
        local rationame1 "ratio_farm_south"
        local rationame2 "ratio_self_emp_south"
        local title "byr_bys"
    }
    
    if "`group'"=="byrace_bysouth_altwgt" {
        local level "race south_merge"
        local var1 "avgincwage_1940_byr_bys_altwgt"
        local var2 "avg_HHinc_1940_byr_bys_altwgt"  
        local rationame1 "ratio_farm_south"
        local rationame2 "ratio_self_emp_south"
        local title "byr_bys_altwgt"
    }
    
    if "`group'"=="byrace_bysouth_byedu" {
        local level "race south_merge edu"
        local var1 "avgincwage_1940_byr_south_edu" 
        local var2 "avg_HHinc_1940_byr_south_edu" 
        local rationame1 "ratio_farm_south_edu"
        local rationame2 "ratio_self_emp_south_edu"
        local title "byr_bys_edu"
    }
    
    if "`group'"=="byrace_byregion" {
        local level "race region_merge"
        local var1 "avgincwage_1940_byrace_byregion" 
        local var2 "avg_HHinc_1940_byrace_byregion" 
        local rationame1 "ratio_farm_region"
        local rationame2 "ratio_self_emp_region"
        local title "byr_byreg"
    }
    
    if "`group'"=="byrace_byregion_byedu" {
        local level "race region_merge edu"
        local var1 "avgincwage_1940_byr_byreg_byedu" 
        local var2 "avg_HHinc_1940_byr_byreg_byedu" 
        local rationame1 "ratio_farm_region_edu"
        local rationame2 "ratio_self_emp_region_edu"
        local title "byr_reg_edu"
    }

    * Conditions for renaming and keeping vars
    if "`group'"=="byrace_bysouth" {
        local cond1 "number*"
    }
    else {
        local cond1 " "
    }
    
    if "`group'"=="byrace_byregion"  {
        local cond0 "rename region region_merge"
    }
    if "`group'"!="byrace_byregion"  {
        local cond0 " "
    }
    
* Bring in right dataset
    if "`group'"!="byrace_bysouth_altwgt" { 
        use avgincomes_fathers1940_`group'.dta, clear
        `cond0'
    }
    if "`group'"=="byrace_bysouth_altwgt" { 
        use avgincomes_fathers1940_byrace_bysouth.dta, clear
        `cond0'
    }
    

    * Keep relevant measures
    noisily display "keep relevant variables"
    keep `var1' `var2' fatheroccej `level' `cond1'
    
    gen laborer = fatheroccej==71
    gen farmer = fatheroccej==81
    gen self_emp = fatheroccej==21
    gen non_self_emp = fatheroccej==28
    gen imputed_occ_CW = laborer==1 | farmer==1 | self_emp==1
    
* 1. Adjust farmer income
    noisily display "fix farmers"
    if "`group'"!="byrace_bysouth_altwgt" { 
        merge m:1 `level' using FarmerRatios_1960_`group'.dta
    }
    if "`group'"=="byrace_bysouth_altwgt" { 
        merge m:1 `level' using FarmerRatios_1960_byrace_bysouth.dta
    }   
    
    drop _merge
    
    clonevar avg_person_CWfix = `var1'
    clonevar avg_hh_CWfix = `var2'
    
    //Account for in-kind income of laborers 
        /*Note: See https://data.nber.org/data-appendix/w23395/Appendix_May_2020.pdf
                for more info */
    foreach var of varlist avg_person_CWfix avg_hh_CWfix {
        replace `var' = 1.26*`var' if laborer==1 
    }

    /*Assign farmers laborer income * ratio 
      of average farmer income to average
      laborer income. Ratio changes with 
      the level of variation. */
    foreach var in person hh {
        gen temp_`var' = avg_`var'_CWfix if laborer==1
        bysort `level': egen laborer_inc_`var' = max(temp_`var') 
        
        replace avg_`var'_CWfix = laborer_inc_`var' * `rationame1'_`var' if farmer==1 
    }

    drop temp* laborer_inc* 
    
* 2. Adjust self-employed income
    noisily display "fix self-employed"
    if "`group'"!="byrace_bysouth_altwgt" { 
        merge m:1 `level' using SelfEmploymentRatios_1960_`group'.dta
    }
    if "`group'"=="byrace_bysouth_altwgt" { 
        merge m:1 `level' using SelfEmploymentRatios_1960_byrace_bysouth.dta
    }   

    drop _merge
    
    foreach var in person hh {   
        gen temp_`var' = avg_`var'_CWfix if non_self_emp==1
        bysort `level': egen income_`var' = max(temp_`var') 
        
        /* Give self-employed respondents 
           non-self-employed income * ratio 
           of self-employed to not self-employed
           average income (from the 1960 census).
           Ratio changes with the level of variation. */
         replace avg_`var'_CWfix = income_`var' * `rationame2'_`var' if self_emp==1 
    }
  
* Save  
    drop temp* income_* ratio* laborer farmer self_emp non_self_emp 
    
    assert `var1'==avg_person_CWfix if imputed_occ_CW==0
    assert `var2'==avg_hh_CWfix if imputed_occ_CW==0
    
    rename avg_person_CWfix avg_incwage_`title'_CWfix 
    label var avg_incwage_`title'_CWfix "Average income score, incwage, `group', CW fix"
    
    rename avg_hh_CWfix avg_HHinc_`title'_CWfix
    label var avg_HHinc_`title'_CWfix "Average income score, HH income, `group', CW fix"
    
    compress
    save avgincomes_fathers1940_`group'_CWfix.dta, replace

}

**---------------------------------------------------------------------------**
**---------------------------------------------------------------------------**
    
****************************
* MOTHER INCOME
****************************

*---------------------------------*
* OCC X RACE X SOUTH (BASELINE)
*---------------------------------*

    use avgincomes_mothers1940_byrace_bysouth.dta, clear
    
    local level "race south_merge"  
    local var1 "avgincwage_1940_byrace_bysouth" 
    local var2 "avg_HHinc_1940_byrace_bysouth" 
    local group "byrace_bysouth"
    local rationame1 "ratio_farm_byr_bys"
    local rationame2 "ratio_self_emp_byr_bys"
    local title "byr_bys"

    * Keep relevant measures
    keep `var1' `var2' motheroccej `level'
    
    gen laborer = motheroccej==71
    gen farmer = motheroccej==81
    gen self_emp = motheroccej==21
    gen non_self_emp = motheroccej==28
    gen imputed_occ_CW = laborer==1 | farmer==1 | self_emp==1
    
* 1. Adjust farmer income
    merge m:1 `level' using FarmerRatios_1960_mothers_`group'.dta
    drop _merge
    
    clonevar avg_person_CWfix = `var1'
    clonevar avg_hh_CWfix = `var2'
    
    //Account for in-kind income of laborers 
        /*Note: See https://data.nber.org/data-appendix/w23395/Appendix_May_2020.pdf
                for more info */
    foreach var of varlist avg_person_CWfix avg_hh_CWfix {
        replace `var' = 1.26*`var' if laborer==1 
    }

    /*Assign farmers laborer income * ratio 
      of average farmer income to average
      laborer income. Ratio changes with 
      the level of variation. */
    foreach var in person hh {
        gen temp_`var' = avg_`var'_CWfix if laborer==1
        bysort `level': egen laborer_inc_`var' = max(temp_`var') 
        
        replace avg_`var'_CWfix = laborer_inc_`var' * `rationame1'_`var' if farmer==1 
    }

    drop temp* laborer_inc* 
    
* 2. Adjust self-employed income
    merge m:1 `level' using SelfEmploymentRatios_1960_mothers_`group'.dta
    drop _merge
    
    foreach var in person hh {
        gen temp_`var' = avg_`var'_CWfix if non_self_emp==1
        bysort `level': egen income_`var' = max(temp_`var') 
        
        /* Give self-employed respondents 
           non-self-employed income * ratio 
           of self-employed to not self-employed
           average income (from the 1960 census).
           Ratio changes with the level of variation. */
        replace avg_`var'_CWfix = income_`var' * `rationame2'_`var' if self_emp==1 
    }
 
*Save   
    drop temp* income_* ratio* laborer farmer self_emp non_self_emp 
    
    assert `var1'==avg_person_CWfix if imputed_occ_CW==0
    assert `var2'==avg_hh_CWfix if imputed_occ_CW==0
    
    rename avg_person_CWfix mom_incwage_`title'_CWfix 
    label var mom_incwage_`title'_CWfix "Average income score for mothers, incwage, `group', CW fix"
    
    rename avg_hh_CWfix mom_HHinc_`title'_CWfix
    label var mom_HHinc_`title'_CWfix "Average income score for mothers, HH income, `group', CW fix"
    
    compress
    save avgincomes_mothers1940_`group'_CWfix.dta, replace

*------------------------------------------------*
* OTHER LEVELS (occ, occ x race, occ x south)
*------------------------------------------------*
  
foreach group in byocc byocc_byr byocc_bys {

use avgincomes_mothers1940_`group'.dta, clear

    if "`group'"=="byocc" {
        gen number=1
        local level "number"
        local var1 "HHinc_1940_byocc" 
        local rationame1 "ratio_farm_hh_all"
        local rationame2 "ratio_self_emp_all_hh"
        local file_name "all"
    }
    
    if "`group'"=="byocc_byr" {
        local level "race"
        local var1 "HHinc_1940_byocc_byr" 
        local rationame1 "ratio_farm_hh"
        local rationame2 "ratio_self_emp_hh"
        local file_name "byrace"
    }   
    
    if "`group'"=="byocc_bys" {
        local level "south_merge"
        local var1 "HHinc_1940_byocc_bys" 
        local rationame1 "ratio_farm_south_hh"
        local rationame2 "ratio_self_emp_south_hh"
        local file_name "bysouth"
    }       
 
    gen laborer = motheroccej==71
    gen farmer = motheroccej==81
    gen self_emp = motheroccej==21
    gen non_self_emp = motheroccej==28
    gen imputed_occ_CW = laborer==1 | farmer==1 | self_emp==1
    
* Keep relevant measures
    keep `var1'  motheroccej `level' laborer-imputed_occ
    
    clonevar avg_hh_CWfix = `var1'
    
* 1. Adjust farmer income
    merge m:1 `level' using FarmerRatios_1960_mothers_`file_name'.dta, assert(3) nogen
        
    //Account for in-kind income of laborers 
        /*Note: See https://data.nber.org/data-appendix/w23395/Appendix_May_2020.pdf
                for more info */
    replace avg_hh_CWfix = 1.26*avg_hh_CWfix if laborer==1 

    /*Assign farmers laborer income * ratio 
      of average farmer income to average
      laborer income. Ratio changes with 
      the level of variation. */
    gen temp_hh = avg_hh_CWfix if laborer==1
    bysort `level': egen laborer_inc_hh = max(temp_hh) 
    
    tab `rationame1'
    replace avg_hh_CWfix = laborer_inc_hh * `rationame1' if farmer==1 

    drop temp* laborer_inc* 
    
* 2. Adjust self-employed income
    merge m:1 `level' using SelfEmploymentRatios_1960_mothers_`file_name'.dta, assert(3) nogen
    
    gen temp_hh = avg_hh_CWfix if non_self_emp==1
    bysort `level': egen income_hh = max(temp_hh) 
    
    /* Give self-employed respondents 
       non-self-employed income * ratio 
       of self-employed to not self-employed
       average income (from the 1960 census).
       Ratio changes with the level of variation. */
    tab `rationame2'
    replace avg_hh_CWfix = income_hh * `rationame2' if self_emp==1 
    
* Save
    sort `level'
    drop temp* income_* ratio* laborer farmer self_emp non_self_emp 
    cap drop number
    
    assert `var1'==avg_hh_CWfix if imputed_occ_CW==0

    rename avg_hh_CWfix mom_HHinc_CWfix_`group'
    label var mom_HHinc_CWfix_`group' "Average income score for mothers, HH income, `group', CW fix"
    
    compress
    save avgincomes_mothers1940_`group'_CWfix.dta, replace
    
    }
